Pretest

Pretest results

This is the data we use to estimate the speaker optimality nad the age specific prior.

RSA model

Model utensils.

There are two types of objects, one used in the 3-object world of the RSA model (all_objects) and one used in the 2-object world of the prior (prior_objects).

rsaUtils <- '
var all_objects = [
{ shape: "triangle", id:1, location: 1},  
{ shape: "triangle", id:2, location: 2},
{ shape: "circle", id:1, location: 2}
]

var prior_objects = [
{ shape: "triangle", id:1, location: 1},  
{ shape: "circle", id:1, location: 2}
]

var labels = ["dax","wug"]

var lexicon1 = function(utterance, obj){
utterance.label == "dax" ? obj.shape == "triangle" :
utterance.label == "wug" ? obj.shape == "circle" : 
true
}

var lexicon2 = function(utterance, obj){
utterance.label == "dax" ? obj.shape == "circle" :
utterance.label == "wug" ? obj.shape == "triangle" : 
true
}

var lexiconObjects = {
"dax = triangle": {
triangle: "dax", circle: "wug"
},
"dax = circle": {
triangle: "wug", circle: "dax"
},
}

var lexiconObject = {
"dax = triangle": lexicon1,
"dax = circle" : lexicon2
}

var point = function(utterance, obj){
return obj.location == utterance.point
}


var utterancePrior = function(obj, lexiconName){
var locationsWithShape = _.map(_.filter(all_objects, {shape: obj.shape}), "location")
var point = uniformDraw(locationsWithShape)
var label = lexiconObjects[lexiconName][obj.shape]
return {label: label, point: point}
}

var LexiconPrior = Categorical({vs: ["dax = triangle","dax = circle" ], ps: [1, 1]})
'

RSA model code

rsaModel <- '
var literalListener = cache(function(utterance, priorProbs){
Infer({method: "enumerate", model: function(){
var lexiconName = sample(LexiconPrior); 
var lexicon = lexiconObject[lexiconName];
var obj = sample( Categorical({vs: all_objects, ps: priorProbs}));
if ("label" in utterance) {
 var truthValue = lexicon(utterance, obj);
 condition(truthValue)
}
if (utterance.point) {
 var truthValuePoint = point(utterance, obj);
 condition(truthValuePoint)
}
return obj.shape 
}})
})

var speaker = cache(function(obj, lexiconName, priorProbs, speakerOptimality){
Infer({method: "enumerate", model: function(){
var utterance = utterancePrior(obj, lexiconName);
var L0 = literalListener(utterance, priorProbs);
 factor(speakerOptimality * L0.score(obj.shape))
return utterance
}})
})

var pragmaticListener = cache(function(utterance, priorProbs, speakerOptimality){
Infer({method: "enumerate", model: function(){
var lexiconName = sample(LexiconPrior);
var obj = sample( Categorical({vs: all_objects, ps: priorProbs}));
var S1 = speaker(obj, lexiconName, priorProbs, speakerOptimality);
observe(S1, utterance)
return obj.shape == "circle" ? 1 : 0
}})
})


var addNoise = function(dist, noiseParam){
   Infer({model: function(){ 
      return flip(noiseParam) ? uniformDraw([0, 1]) : sample(dist)
    }
   })
}
'

Priors for intercept and slope for speaker optimality and preference

Sanity check for the priors on slope and intercept.

Speaker optimality

priorSoWebppl <- '
var infData = dataFromR
var priorProbs = [.5, .5, .5] 
var speakerOptimality = []
var model  = function(){
  var so_slope = uniformDrift({a: -2, b: 2, width: 0.4})
  var so_int = uniformDrift({a: -2, b: 2, width: 0.4})
    map(function(row){
      var age = row.age_num
      var speakerOptimality = so_int  + so_slope * (age - row.minage)
      var rsaPredictions = pragmaticListener({label: "dax", point: 2 }, priorProbs, speakerOptimality) 
      
      //display(JSON.stringify(row.minage))
    }, infData)
  return extend({so_int: so_int, so_slope: so_slope})
}
'
so_prior <- readRDS("../saves/so_prior.rds")
# so_prior<- webppl(
#   program_code = paste(rsaUtils, rsaModel, priorSoWebppl , sep='\n'),
#   data = inf_data%>%mutate(minage = min(age_num)), 
#   data_var = "dataFromR",
#   model_var = "model",
#   chains = 1,
#   inference_opts = list(method = "forward", samples = 1000, verbose = T)
# )
#saveRDS(so_prior, "../saves/so_prior.rds")

priorPred <- so_prior%>%
  spread(Parameter, value)%>%
  mutate(chain = factor(Chain))

so_prior <- so_prior%>%
  mutate(chain = factor(Chain))

mean_so_pre <-priorPred%>%
  summarise(int = mean(so_int),
            slope = mean(so_slope))


so_prior_plot <- ggplot(data = priorPred) +
  geom_abline(aes(intercept = so_int, slope = so_slope), col = "grey", alpha = 0.3)+
  geom_abline(data = mean_so_pre, aes(intercept = int, slope = slope), size = 0.4)+
  scale_x_continuous(limits=c(0, 2), name="age", breaks = c(0:2), labels = c(3:5)) +
  scale_y_continuous(limits=c(-4, 4), name="Speaker optimality")+
  ggtitle("Model speaker optimality prior")+
  coord_fixed(ratio=1/4)+
  theme_few()

Preference

priorPreWebppl <- '
var prefData = dataFromR
var priorProbs = [] 
var logistic = function(x) {1 / (1 + Math.exp(-x))}
var model  = function(){
  var pref_slope = uniformDrift({a: -2, b: 2, width: 0.4})
  var pref_int = uniformDrift({a: -2, b: 2, width: 0.4})
    map(function(row){
      var age = row.age_num
      var priorReg = logistic(pref_int + pref_slope * (age - row.minage))
      var priorProbs= [1-priorReg, priorReg]
      var priorPredictions = Infer({method: "enumerate", model: function(){
      var obj = sample( Categorical({vs: prior_objects, ps: priorProbs}));
      return obj.shape == "circle" ? 1 : 0
      }})
      
      // observe(priorPredictions, row.correct)
     // display(JSON.stringify(priorReg))
    }, prefData)
  return extend({pref_int: pref_int, pref_slope: pref_slope})
}
'
pref_prior_con <- readRDS("../saves/pref_prior_con.rds")

# pref_prior_con<- webppl(
#   program_code = paste(rsaUtils, rsaModel, priorPrefConWebppl , sep='\n'),
#   data = pref_data%>%mutate(minage = min(age_num)), 
#   data_var = "dataFromR",
#   model_var = "model",
#   chains = 1,
#   inference_opts = list(method = "forward", samples = 1000, verbose = T)
# )
 # saveRDS(pref_prior_con, "../saves/pref_prior_con.rds")

prefPriorPredCon <- pref_prior_con%>%
  spread(Parameter, value)

pref_prior_con <- pref_prior_con%>%
  mutate(chain = factor(Chain),
         alignment = "congruent")

x_plot <- seq(0, 2, by = 0.1)
model_predictions <- sapply(1:length(prefPriorPredCon$pref_int), function(idx) {
            plogis(prefPriorPredCon$pref_int[idx] + prefPriorPredCon$pref_slope[idx] * x_plot)
        })
colnames(model_predictions) <- 1:length(prefPriorPredCon$pref_int)
plot_pref_prior_con <- as.data.frame(cbind(x_plot, model_predictions))
plot_pref_prior_con <- melt(plot_pref_prior_con, id.vars = "x_plot", variable.name = "iteration", 
        value.name = "y_plot")

plot_prior_con_mean <- plot_pref_prior_con%>%
  group_by(x_plot)%>%
  summarise(y_plot = mean(y_plot))




pref_prior_plot <- ggplot(plot_pref_prior_con) + 
  geom_line(aes(x_plot, y_plot, group = iteration), col = "grey",alpha = 0.2) +
  #geom_ribbon(data = plot_prior_mean, aes(x =x_plot, ymin = ci_lower, ymax = ci_upper), fill = "red", alpha = 0.5) +
  geom_line(data = plot_prior_con_mean, aes(x_plot, y_plot), col = "black", size = 0.4) +
  xlab("age") + ylab("Proportion Expected Choice") +
  scale_x_continuous(limits=c(0,2), name="age", breaks = c(0:2), labels = c(3:5)) +
  scale_y_continuous(limits = c(0, 1), name="P in favor of preferred object")+
  ggtitle("Model preference prior")+
  coord_fixed(ratio=2)+
  theme_few()

There is no information in the priors. All structure in the models further down comes from the data

Model Predictions

Model code for predictions

modelPredWebppl <- '

var levels = function(df, label){
  return _.uniq(_.map(df, label));
}
var prefData = _.filter(dataFromR, {experiment: "preference_pretest"})
var infData = _.filter(dataFromR, {experiment: "informativeness_pretest"})

// make finer grained as needed
var binned_age_range = _.range(0, 2, 0.05)

var all_conditions = levels(prefData, "condition")

var foreach = function(fn, lst) {
    var foreach_ = function(i) {
        if (i < lst.length) {
            fn(lst[i]);
            foreach_(i + 1);
        }
    };
    foreach_(0);
};

var logistic = function(x) {1 / (1 + Math.exp(-x))}

var model  = function(){


  var so_slope = uniformDrift({a: -2, b: 2, width: 0.4})
  var so_int = uniformDrift({a: -2, b: 2, width: 0.4})

  foreach(function(row){

    var age = row.age_num

    var speakerOptimality = so_int  + so_slope * (age - infData[0].minage)
    
    var inf_priorProbs = [.5, .5, .5] 

    var rsaPredictions = pragmaticListener({label: "dax", point: 2 },
    inf_priorProbs, speakerOptimality) 
    
    observe(rsaPredictions, row.correct)

  }, infData)

  var pref_params = map(function(cndtn){

    var conditionData = _.filter(prefData, {condition: cndtn})
    
    var pref_slope = uniformDrift({a: -2, b: 2, width: 0.4})
    var pref_int = uniformDrift({a: -2, b: 2, width: 0.4})    
    
    foreach(function(row){

      var age = row.age_num

      var priorReg = logistic(pref_int + pref_slope * (age - row.minage))

      var priorProbs= [1-priorReg, priorReg]

      var priorPredictions = Infer({method: "enumerate", model: function(){
      var obj = sample( Categorical({vs: prior_objects, ps: priorProbs}));
      return obj.shape == "circle" ? 1 : 0
      }})
      
      observe(priorPredictions, row.correct)

    }, conditionData)
    
    return {pref_int, pref_slope, condition: cndtn}
    
  }, all_conditions)
  
  var diff_speaker_params = _.filter(pref_params, 
        {condition: "different_speaker"})[0]

  var same_speaker_params = _.filter(pref_params, 
        {condition: "same_speaker"})[0]
        
  var predictions_by_age = map(function(age_bin){
    
    var speakerOptimality = so_int  + so_slope * age_bin
    
    var diff_priorReg = logistic(diff_speaker_params.pref_int +
            diff_speaker_params.pref_slope * age_bin)

    var diff_priorProbs_congruent = [1-diff_priorReg, 1-diff_priorReg, diff_priorReg]

    var diff_priorProbs_incongruent = [diff_priorReg, diff_priorReg, 1-diff_priorReg]
      
    var rsaPredictions_diff_cong = expectation(pragmaticListener({label: "dax", point: 2 }, diff_priorProbs_congruent,   speakerOptimality))

    var rsaPredictions_diff_incong = expectation(pragmaticListener({label: "dax", point: 2 }, diff_priorProbs_incongruent, speakerOptimality))
      
    var same_priorReg = logistic(same_speaker_params.pref_int +
            same_speaker_params.pref_slope * age_bin)

    var same_priorProbs_congruent = [1-same_priorReg, 1-same_priorReg, same_priorReg]

    var same_priorProbs_incongruent = [same_priorReg, same_priorReg, 1-same_priorReg]

    var rsaPredictions_same_cong = expectation(pragmaticListener({label: "dax", point: 2 }, same_priorProbs_congruent, speakerOptimality))

    var rsaPredictions_same_incong = expectation(pragmaticListener({label: "dax", point: 2 }, same_priorProbs_incongruent, speakerOptimality))

    return extend(
    // diff_speaker_params, same_speaker_params, 
      {
      diff_priorReg,
      same_priorReg,
      speakerOptimality,
      rsaPredictions_diff_cong,
      rsaPredictions_diff_incong,
      rsaPredictions_same_cong,
      rsaPredictions_same_incong,
      age_bin,
      so_slope,
      so_int,

    })
    
  }, binned_age_range)
  
  return predictions_by_age
}
'

Sampling predictions

Taking samples and munging the data.

model_pred <- readRDS("../saves/kids_model_predictions.rds")

# model_pred<- webppl(
#   program_code = paste(rsaUtils, rsaModel, modelPredWebppl , sep='\n'),
#   data = pre_data,
#   data_var = "dataFromR",
#   model_var = "model",
#   chains = 4,
#   cores = 4,
#   inference_opts = list(method = "MCMC", samples = 900, burn = 200, verbose = T)
# )

#saveRDS(model_pred, file = "../saves/kids_model_predictions.rds")

kids_model_pred <- model_pred%>%
  select(value) %>%
  map_df(bind_rows)%>%
  mutate(iteration = rep(1:3600,each = 40))%>%
  select(-diff_priorReg,-same_priorReg,-speakerOptimality,-so_slope,-so_int)%>%
  gather(condition, prop_informative, -iteration,-age_bin)%>%
  separate(condition, into = c("model", "Speaker", "Alignment"), sep="_")%>%
  mutate(Speaker = ifelse(Speaker == "diff", "different_speaker","same_speaker"),
         Alignment = ifelse(Alignment == "cong", "congruent","incongruent"))


kids_model_pred_so <- model_pred%>%
  select(value) %>%
  map_df(bind_rows)%>%
  mutate(iteration = rep(1:3600,each = 40))%>%
  select(so_slope,so_int,age_bin,iteration,speakerOptimality)


kids_model_pred_prior <- model_pred%>%
  select(value) %>%
  map_df(bind_rows)%>%
  mutate(iteration = rep(1:3600,each = 40))%>%
  select(age_bin,iteration,diff_priorReg,same_priorReg)%>%
  gather(condition, prop_preferred, -iteration,-age_bin)%>%
  mutate(condition = ifelse(condition == "diff_priorReg", "different_speaker","same_speaker"))

Sanity checks

Model based speaker optimality by age

Here we estimate the speaker optimality parameter that best captures the data for the different ages. The model based estimate nicely captures the developmental trend.

Model based priors by age

The model accurately captures the the difference between conditions as well as the developmental trend we see in the data.

RSA Model Predictions

These are the predictions from the RSA model which takes in the age specific speaker optimality parameter and the age specific priors. The priors are converted from 2-object world into 3-object world before going into the model. This corresponds to the way we handled the priors for adults.

No Prior Model Predictions

Prior Only Model

priorOnlyWebppl <- '

var levels = function(df, label){
  return _.uniq(_.map(df, label));
}
var prefData = _.filter(dataFromR, {experiment: "preference_pretest"})

// make finer grained as needed
var binned_age_range = _.range(0, 2, 0.05)

var all_conditions = levels(prefData, "condition")

var foreach = function(fn, lst) {
    var foreach_ = function(i) {
        if (i < lst.length) {
            fn(lst[i]);
            foreach_(i + 1);
        }
    };
    foreach_(0);
};

var logistic = function(x) {1 / (1 + Math.exp(-x))}

var model  = function(){

  var pref_params = map(function(cndtn){

    var conditionData = _.filter(prefData, {condition: cndtn})
    
    var pref_slope = uniformDrift({a: -2, b: 2, width: 0.4})
    var pref_int = uniformDrift({a: -2, b: 2, width: 0.4})    
    
    foreach(function(row){

      var age = row.age_num

      var priorReg = logistic(pref_int + pref_slope * (age - row.minage))

      var priorProbs= [1-priorReg, priorReg]

      var priorPredictions = Infer({method: "enumerate", model: function(){
      var obj = sample( Categorical({vs: prior_objects, ps: priorProbs}));
      return obj.shape == "circle" ? 1 : 0
      }})
      
      observe(priorPredictions, row.correct)

    }, conditionData)
    
    return {pref_int, pref_slope, condition: cndtn}
    
  }, all_conditions)
  
  var diff_speaker_params = _.filter(pref_params, 
        {condition: "different_speaker"})[0]

  var same_speaker_params = _.filter(pref_params, 
        {condition: "same_speaker"})[0]
        
  var predictions_by_age = map(function(age_bin){
    
    
    var diff_priorReg = logistic(diff_speaker_params.pref_int +
            diff_speaker_params.pref_slope * age_bin)

    var diff_priorProbs_congruent = [1-diff_priorReg, 1-diff_priorReg, diff_priorReg]

    var diff_priorProbs_incongruent = [diff_priorReg, diff_priorReg, 1-diff_priorReg]
      

    var same_priorReg = logistic(same_speaker_params.pref_int +
            same_speaker_params.pref_slope * age_bin)

    var same_priorProbs_congruent = [1-same_priorReg, 1-same_priorReg, same_priorReg]

    var same_priorProbs_incongruent = [same_priorReg, same_priorReg, 1-same_priorReg]


 var priorPredictions_diff_con = Infer({method: "enumerate", model: function(){
      var obj = sample( Categorical({vs: all_objects, ps: diff_priorProbs_congruent}));
      return obj.shape == "circle" ? 1 : 0
      }})

  var priorPredictions_diff_incon = Infer({method: "enumerate", model: function(){
      var obj = sample( Categorical({vs: all_objects, ps: diff_priorProbs_incongruent}));
      return obj.shape == "circle" ? 1 : 0
      }})

 var priorPredictions_same_con = Infer({method: "enumerate", model: function(){
      var obj = sample( Categorical({vs: all_objects, ps: same_priorProbs_congruent}));
      return obj.shape == "circle" ? 1 : 0
      }})

  var priorPredictions_same_incon = Infer({method: "enumerate", model: function(){
      var obj = sample( Categorical({vs: all_objects, ps: same_priorProbs_incongruent}));
      return obj.shape == "circle" ? 1 : 0
      }})

     var prior_diff_con = Math.exp(priorPredictions_diff_con.score(1))
var prior_diff_incon = Math.exp(priorPredictions_diff_incon.score(1))
     var prior_same_con = Math.exp(priorPredictions_same_con.score(1))
var prior_same_incon = Math.exp(priorPredictions_same_incon.score(1))

    return extend(
      {prior_diff_con,
prior_diff_incon,
prior_same_con,
prior_same_incon,
    age_bin})

    
  }, binned_age_range)
  
  return predictions_by_age 
}
'
prior_only_predictions <- readRDS("../saves/prior_only_predictions.rds")

# prior_only_predictions<- webppl(
#   program_code = paste(rsaUtils, rsaModel, priorOnlyWebppl , sep='\n'),
#   data = pre_data,
#   data_var = "dataFromR",
#   model_var = "model",
#   chains = 3,
#   cores = 3,
#   inference_opts = list(method = "MCMC", samples = 1000, burn = 200, verbose = T)
# )

#saveRDS(prior_only_predictions, "../saves/prior_only_predictions.rds")


prior_only_model <- prior_only_predictions %>%
  select(value) %>%
  map_df(bind_rows)%>%
  mutate(iteration = rep(1:3000,each = 40))%>%
  gather(condition, prop_informative, -iteration,-age_bin)%>%
  separate(condition, into = c("model", "Speaker", "Alignment"), sep="_")%>%
  mutate(Speaker = ifelse(Speaker == "diff", "different_speaker","same_speaker"),
         Alignment = ifelse(Alignment == "con", "congruent","incongruent"))

Comparing model predictions

model_comp_mean_ci <- bind_rows(
plot_model_pred%>%ungroup()%>%mutate(model = "RSA"), 
plot_prior_only_pred%>%ungroup()%>%mutate(model = "Prior Only"),
no_prior_plot%>%ungroup()%>%mutate(model = "No Prior")
)

model_comp <- bind_rows(
kids_model_pred %>%mutate(model = "RSA"),
prior_only_model%>%mutate(model = "Prior Only"),
no_prior_pred%>%select(-so_slope,-so_int,-speakerOptimality)%>%mutate(model = "No Prior")
  )